clean__adm__clean_liceu_repartizat<-function(data_adm_raw,year){
  
  ms<-unique(data_adm_raw[,c("judet","liceu_repartizat")])
  

  
  
  
  ms$liceu_repartizat_new<-ms$liceu_repartizat
  
  
  ms$liceu_repartizat_new<-toupper(ms$liceu_repartizat_new)
  
  #2017
  # ms$liceu_repartizat<-gsub("á","A",ms$liceu_repartizat)
  # 
  # ms$liceu_repartizat<-gsub("ș","S",ms$liceu_repartizat)
  # 
  # ms$liceu_repartizat<-gsub("ț","T",ms$liceu_repartizat)
  # 
  # ms$liceu_repartizat<-gsub("é","E",ms$liceu_repartizat)
  # 
  ms$liceu_repartizat_new<-gsub("„","\"",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("ó","O",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("ö","O",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("ö","O",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("ő","O",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("’’","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("’","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub(",,","\"",ms$liceu_repartizat_new)
  
  
  
  #2016
  ms$liceu_repartizat_new<-gsub("“","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("”","\"",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("Á","A",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("ă","A",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ă","A",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("â","A",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Â","A",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("É","E",ms$liceu_repartizat_new)
  
  
  ms$liceu_repartizat_new<-gsub("Î","I",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("Ó","O",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ö","O",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ő","O",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("Ș","S",ms$liceu_repartizat_new)
  # ms$liceu_repartizat_new<-gsub("ş","S",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ş","S",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("Ț","T",ms$liceu_repartizat_new)
  #ms$liceu_repartizat_new<-gsub("ţ","T",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ţ","T",ms$liceu_repartizat_new)
  
  #2014
  ms$liceu_repartizat_new<-gsub("I. C. PETRESCU: STALPENI","I. C. PETRESCU\", STALPENI",ms$liceu_repartizat_new)
  
  #2013
  ms$liceu_repartizat_new<-gsub("‘ ","\"",ms$liceu_repartizat_new)
  
  
  #change to uppercase
  ms$liceu_repartizat_new<-toupper(ms$liceu_repartizat_new)
  
  #2018-2014
  ms$liceu_repartizat_new<-gsub("A€Ť","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A€Ś","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A€Ž","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A€Ž","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A€Ž","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("IA¿I","IASI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub(" ¿I "," SI ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("¿IBANE¿TI","TIBANESTI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("¿TEFAN","STEFAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TIMI¿OARA","TIMISOARA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CONSTRUC¿II","CONSTRUCTII",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("NA¿IONAL","NATIONAL",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("NAAŻIONAL","NATIONAL",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("AŻTEFAN","STEFAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("AŽ","A",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("IAAŻI","IASI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("AŻI ","SI ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CONSTRUCAŻII","CONSTRUCTII",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("AŻIBANEAŻTI","TIBANESTI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A‚","A",ms$liceu_repartizat_new)
  
  #2017-2013
  ms$liceu_repartizat_new<-gsub("'","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\?\\?","\"",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("CA\\?MPULUNG","CAMPULUNG",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CA\\?PULUNG","CAMPULUNG",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("SZENT LA\\?SZLA\\?","SZENT LASZLO",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("BRA\\?NCOVEANU","BRANCOVEANU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub(" A\\?I "," SI ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TA\\?RNAVENI","TARNEVENI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TA\\?NAVENI","TARNEVENI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("STA\\?LPENI","STALPENI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("STA\\?PENI","STALPENI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\?COLAR","SCOLAR",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\?COALA","SCOALA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("IVEA\\?TI","IVESTI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("BRA\\?COVEANU","BRANCOVEANU",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("\\?$","\"",ms$liceu_repartizat_new,perl=T)
  ms$liceu_repartizat_new<-gsub(" \\?","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\?,","\",",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("\\?,","\",",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("\\? ","\" ",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("SILVIC\\?THEODOR","SILVIC \"THEODOR",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("_","-",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("^SC\\.","SCOALA",ms$liceu_repartizat_new,perl=T)
  ms$liceu_repartizat_new<-gsub("^SC ","SCOALA ",ms$liceu_repartizat_new,perl=T)
  ms$liceu_repartizat_new<-gsub("SC CU","SCOALA CU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("SC\\. CU","SCOALA CU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("SC. I-VIII","SCOALA I-VIII",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("SC I-VIII","SCOALA I-VIII",ms$liceu_repartizat_new)
  
  ms$liceu_repartizat_new<-gsub("GRUP ","GRUPUL ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GR\\. SC","GRUPUL SC",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GR SC","GRUPUL SC",ms$liceu_repartizat_new)

  ms$liceu_repartizat_new<-gsub("GRUPUL SC\\.","GRUPUL SCOLAR",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GRUPUL SC ","GRUPUL SCOLAR ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CLS\\. ","CLASELE ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CLS ","CLASELE ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("^COL\\.","COLEGIUL",ms$liceu_repartizat_new,perl=T)
  ms$liceu_repartizat_new<-gsub("^COL ","COLEGIUL ",ms$liceu_repartizat_new,perl=T)
  
  ms$liceu_repartizat_new<-gsub("LICEULT","LICEUL T",ms$liceu_repartizat_new)

  




  
  
  
  
  
  
  
  
  
  
  
  
  #2019-2015
  ms$liceu_repartizat_new<-gsub("Ő","O",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ş","S",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ă","A",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("Ţ","T",ms$liceu_repartizat_new)
  
  #Spacing et al
  ms$liceu_repartizat_new<-gsub("\\.([A-Za-z])", "\\. \\1",  ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("\\s+(?=\\p{Pd})|(?<=\\p{Pd})\\s+", "", ms$liceu_repartizat_new, perl=TRUE) # suppress space before -
  ms$liceu_repartizat_new<-trimws(ms$liceu_repartizat_new, which = c("both")) #trim white space leading and lagging
  ms$liceu_repartizat_new<-gsub("\\s+", " ", trimws(ms$liceu_repartizat_new))
  
  #CHANGE WEIRD QUOTES INTO "
  ms$liceu_repartizat_new<-gsub("''-","\" ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("\"-", "\" ",  ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("''","\"",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("'","\"",ms$liceu_repartizat_new)
  
  #Make some modifications to town names; remove old I-hat and replace with A-hat, lus some other changes
  ms$liceu_repartizat_new<-gsub("JIU\\.","JIU,",ms$liceu_repartizat_new) # CHANGE . to , FOR iasi
  ms$liceu_repartizat_new<-gsub("RM\\.","RAMNICU",ms$liceu_repartizat_new) 
  ms$liceu_repartizat_new<-gsub("TIRG","TARG",ms$liceu_repartizat_new) 
  ms$liceu_repartizat_new<-gsub("SFINT","SFANT",ms$liceu_repartizat_new) 
  ms$liceu_repartizat_new<-gsub('SINGEORGIU','SANGEORGIU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('SINMARTIN','SANMARTIN',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('SINTANA','SANTANA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('PINCOTA','PANCOTA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('CIMPINA','CAMPINA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('RISNOV','RASNOV',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('CIMPIA','CAMPIA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('HIRSOVA','HARSOVA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('CIMPULUNG','CAMPULUNG',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("PIATRA NEAMT","PIATRA-NEAMT",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TARGU JIU","TARGU-JIU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("FIERBINTI-TARG","FIERBINTI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("HIRLAU","HARLAU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TIRNAVENI","TARNEVENI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("RIMNICU","RAMNICU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CURTEA DE AG\\.","CURTEA DE ARGES",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CURTEA DE AG","CURTEA DE ARGES",ms$liceu_repartizat_new)
  
  
  #make some modifications which will faciliate matching HS's over several years
  ms$liceu_repartizat_new<-gsub('GR\\. SC\\.','GRUPUL SCOLAR',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('GRUP SCOLAR','GRUPUL SCOLAR',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('LIC\\.','LICEUL',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('TEHN\\.','TEHNOLOGIC',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('GRUP SC\\.','GRUPUL SCOLAR',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("AL\\. I\\.","ALEXANDRU IOAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\. I\\.","ALEXANDRU IOAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GH\\. M\\.","GHEORGHE MUNTEANU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GH\\.","GHEORGHE",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("BARTOK BELA","BELA BARTOK",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C\\. BREDICEANU","CORIOLAN BREDICEANU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C\\. DIACONOVICI","CONSTANTIN DIACONOVICI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C\\. NEGRI","COSTACHE NEGRI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("D\\. CANTEMIR","DIMITRIE CANTEMIR",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("G\\. VRANCEANU","GHEORGHE VRANCEANU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GR\\.","GRIGORE",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("M\\. EMINESCU","MIHAI EMINESCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("V\\. ALECSANDRI","VASILE ALECSANDRI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("G\\. APOSTU","GEORGE APOSTU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\. SALIGNY","ANGHEL SALIGNY",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C\\. D\\. NENITESCU","COSTIN D\\. NENITESCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("ED\\. NICOLAU","EDMOND NICOLAU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("G-RAL","GENERAL",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C-TIN","CONSTANTIN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("D\\. PRAPORGESCU","DAVID PRAPORGESCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("AL\\.","ALEXANDRU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("G\\. P\\.","GEORGE POP",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("I\\. ZOSSIMA","IORDACHE ZOSSIMA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TG\\.","TARGU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("TG","TARGU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("J\\. LEBEL","JOHANNES LEBEL",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("J\\. M\\. ELIAS","JACQUES M\\. ELIAS",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C-TIN","CONSTANTIN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("M\\. BASARAB","MATEI BASARAB",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("N\\. ONCESCU","NICOLAE ONCESCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("SF\\.","SFANTUL",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("T\\. VLADIMIRESCU","TUDOR VLADIMIRESCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("RM ","RAMNICU ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("RM\\.","RAMNICU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("I\\. L\\.","ION LUCA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("I\\. C\\. DRAGUSANU","ION CODRU DRAGUSANU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("J GREGOR TAJOVSKI","JOZEF GREGOR TAJOVSKI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("S\\. HARET","SPIRU HARET",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("SF ","SFANTU ",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub(" TIMIS$","",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("C\\. BRANCUSI","CONSTANTIN BRANCUSI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\. IANCU","AVRAM IANCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("J\\. KOZACEK","JOZEF KOZACEK",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("I\\. VULCAN","IOSIF VULCAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("M\\. VITEAZU","MIHAI VITEAZUL",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("S\\. VULCAN","SAMUIL VULCAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\. SAGUNA","ANDREI SAGUNA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("T\\. VUIA","TRAIAN VUIA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("V\\. VOICULESCU","VASILE VOICULESCU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("L\\. BLAGA","LUCIAN BLAGA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("M\\. KOGALNICEANU","MIHAIL KOGALNICEANU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("N\\. BOLCAS","NICOLAE BOLCAS",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("N\\. JIGA","NICOLAE JIGA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("P\\. COSMA","PARTENIE COSMA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("D\\. LEONIDA","DIMITRIE LEONIDA",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("O\\. GHIBU","ONISIFOR GHIBU",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("A\\. ROMAN","ALEXANDRU ROMAN",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("GHEORGHE MURGOCI","GHEORGHE MUNTEANU MURGOCI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("N\\. COMANECI","NADIA COMANECI",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CU PROGRAM SPORTIV","SPORTIV",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub("CU PROGRAM DE ATLETISM","SPORTIV",ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('"DIMITRIE TICHINDEAL"','"PREPARANDIA-DIMITRIE TICHINDEAL"',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('N\\. VASILESCU','NICOLAE VASILESCU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('IND\\.','INDUSTRIAL',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('N PLESOIANU','NICOLAE PLESOIANU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('C\\. ANGELESCU','CONSTANTIN ANGELESCU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('DOBRESCU-ARGES','DOBRESCU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('SZENT ERZSEBET','SFANTA ELISABETA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('SAT CIORANII DE JOS\\. COMUNA CIORANI','CIORANI',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('CIORANII DE JOS','CIORANI',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('MANECIU-UNGURENI','MANECIU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('SAT GHEABA\\. COMUNA MANECIU','MANECIU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('ION I\\. C\\. BRATIANU','ION CONSTANTIN BRATIANU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('I\\. C\\. BRATIANU','ION CONSTANTIN BRATIANU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('ION C\\. BRATIANU','ION CONSTANTIN BRATIANU',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('TEHNLOGIC','TEHNOLOGIC',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('SINTANA','SANTANA',ms$liceu_repartizat_new)
  ms$liceu_repartizat_new<-gsub('BOLINTIN VALE','BOLINTIN-VALE',ms$liceu_repartizat_new)
  
  #remove village name and keep only town name
  ms$liceu_repartizat_new<-unlist(lapply(1:length(ms$liceu_repartizat_new), function(x)
    gsub('(SAT ).*',gsub('.+?(?=COMUNA)',"\\1",ms$liceu_repartizat_new[x],perl=TRUE),ms$liceu_repartizat_new[x],perl=T)))
  
  if (year==2014){
    ms$liceu_repartizat_new<-gsub('GALAT$','GALATI',ms$liceu_repartizat_new)
    ms$liceu_repartizat_new<-gsub('TARGU-MURES','TARGU MURES',ms$liceu_repartizat_new)
  }
  if (year==2019){
    ms[ms$judet=='MEHEDINTI',]$liceu_repartizat_new<-gsub('COLEGIUL TEHNOLOGIC$','COLEGIUL TEHNIC DE TRANSPORTURI AUTO',ms[ms$judet=='MEHEDINTI',]$liceu_repartizat_new)
  }
  
  #add space before first quotation
  ms$liceu_repartizat_new<-sub('(.*?)"','\\1 "', ms$liceu_repartizat_new)
  
  #add space after last quotation not followed by a coma, then delete all double spaces
  ms$liceu_repartizat_new<-sub('(\\".*?)"(?!\\,)','\\1" ',ms$liceu_repartizat_new,perl=T)
  ms$liceu_repartizat_new<-gsub("\\s+", " ", ms$liceu_repartizat_new)
  
  
  data_adm_raw<-base::merge(data_adm_raw,ms,by=c("judet","liceu_repartizat"),all.x=T)
  data_adm_raw$liceu_repartizat_orig<-data_adm_raw$liceu_repartizat
  data_adm_raw$liceu_repartizat<-data_adm_raw$liceu_repartizat_new
  data_adm_raw<-data_adm_raw %>% select(-liceu_repartizat_new) %>% ungroup
  data_adm_raw$liceu_repartizat<-trimws(data_adm_raw$liceu_repartizat)
  
  return(data_adm_raw)
}